Source: https://a816-health.nyc.gov/hdi/epiquery/visualizations?PageType=ps&PopulationSource=Syndromic
Respiratory includes ED chief complaint mention of bronchitis, chest cold, chest congestion, chest pain, cough, difficulty breathing, pneumonia, shortness of breath, and upper respiratory infection.
ILI includes ED chief complaint mention of flu, fever, and sore throat.
resp_daily_synd_surveil_raw_data =
read.csv("../Down_Data/respiratory_daily_syndrome_surveillance_16_20_NYC_no_data_note_csv.csv")
head(resp_daily_synd_surveil_raw_data)
## Ind1Name Dim1Name Dim1Value Dim2Name Dim2Value Date
## 1 Respiratory Borough Bronx Age Group All age groups 1/1/16
## 2 Respiratory Borough Bronx Age Group All age groups 1/2/16
## 3 Respiratory Borough Bronx Age Group All age groups 1/3/16
## 4 Respiratory Borough Bronx Age Group All age groups 1/4/16
## 5 Respiratory Borough Bronx Age Group All age groups 1/5/16
## 6 Respiratory Borough Bronx Age Group All age groups 1/6/16
## Select.Metric X
## 1 Count 307
## 2 Count 366
## 3 Count 316
## 4 Count 363
## 5 Count 302
## 6 Count 339
resp_daily_synd_surveil_clean_data =
resp_daily_synd_surveil_raw_data %>%
dplyr::select(Date = Date,
Syndrome = Ind1Name,
Borough = Dim1Value,
Age_Group = Dim2Value,
Count = X) %>%
filter(Borough == "Citywide") %>%
filter(Age_Group == "All age groups") %>%
mutate(Adj_Date = mdy(Date)) %>%
mutate(Adj_Count = str_replace(string = Count,
pattern = ',',replacement = '')) %>%
mutate(Adj_Count = as.numeric(Adj_Count)) %>%
dplyr::select(Date = Adj_Date, Syndrome,
Count = Adj_Count)
p = ggplot(data = resp_daily_synd_surveil_clean_data,
aes(x = Date, y = Count)) + geom_point() +
geom_line() + rahul_man_figure_theme
p
p = ggplot(data = resp_daily_synd_surveil_clean_data,
aes(x = Date, y = Count)) + geom_point() +
rahul_man_figure_theme
p
resp_daily_synd_surveil_clean_data = resp_daily_synd_surveil_clean_data %>%
mutate(Day_in_Year = yday(Date),
Year = year(Date))
p = ggplot(data = resp_daily_synd_surveil_clean_data,
aes(x = Day_in_Year, y = Count,
color = as.character(Year))) + geom_point() +
rahul_man_figure_theme
p
write.csv(resp_daily_synd_surveil_clean_data,
file = "../Generated_Data/resp_daily_synd_surveil_clean_data.csv",
row.names = FALSE)
Repeat the analysis using weekly syndrome surveillance data instead of daily (that way it is the same frequency as the confirmed influenza case counts). We assume that “2016 Week 1” refers to the week from Jan 1st-Jan2nd. We therefore start from the second week.
If first day of the year falls on day 4 or earlier, the first week
resp_weekly_synd_surveil_raw_data =
read.csv("../Down_Data/respiratory_weekly_syndrome_surveillance_16_20_NYC_no_data_note_csv.csv")
head(resp_weekly_synd_surveil_raw_data)
## Ind1Name Dim1Name Dim1Value Dim2Name Dim2Value Date
## 1 Respiratory Borough Bronx Age Group All age groups 2016 wk1
## 2 Respiratory Borough Bronx Age Group All age groups 2016 wk2
## 3 Respiratory Borough Bronx Age Group All age groups 2016 wk3
## 4 Respiratory Borough Bronx Age Group All age groups 2016 wk4
## 5 Respiratory Borough Bronx Age Group All age groups 2016 wk5
## 6 Respiratory Borough Bronx Age Group All age groups 2016 wk6
## Select.Metric X
## 1 Count 673
## 2 Count 2,210
## 3 Count 2,073
## 4 Count 1,859
## 5 Count 2,031
## 6 Count 2,045
resp_weekly_synd_surveil_clean_data =
resp_weekly_synd_surveil_raw_data %>%
dplyr::select(Date = Date,
Syndrome = Ind1Name,
Borough = Dim1Value,
Age_Group = Dim2Value,
Count = X) %>%
filter(Borough == "Citywide") %>%
filter(Age_Group == "All age groups") %>%
separate(Date, c("Year", "Hosp_Week"), " wk") %>%
mutate(First_Date_in_Year =
mdy(paste0("01/01/",Year))) %>%
mutate(Day_of_Week_of_First_Date_in_Year =
wday(First_Date_in_Year)) %>%
mutate(Hosp_Week = as.numeric(Hosp_Week),
Count_second_week_as_first_epi_week = as.numeric(Day_of_Week_of_First_Date_in_Year > 4)) %>%
mutate(Days_To_Count_For_First_Week_If_in_Epi_year = 7-Day_of_Week_of_First_Date_in_Year ) %>%
mutate(Week_Ending_in_Date = First_Date_in_Year + days(Days_To_Count_For_First_Week_If_in_Epi_year) +
weeks(Hosp_Week-1)) %>%
mutate(Week_Ending_in_Date = as.Date(Week_Ending_in_Date)) %>%
mutate(Hosp_Week = as.numeric(Hosp_Week)) %>%
mutate(Adj_Count = str_replace(string = Count,
pattern = ',',replacement = '')) %>%
mutate(Adj_Count = as.numeric(Adj_Count)) %>%
dplyr::select(Week_Ending_in_Date,
Syndrome, Year, Hosp_Week,
Count = Adj_Count)
p = ggplot(data = resp_weekly_synd_surveil_clean_data,
aes(x = Week_Ending_in_Date,
y = Count)) +
geom_point() +
rahul_man_figure_theme
p
write.csv(resp_weekly_synd_surveil_clean_data,
file =
"../Generated_Data/weekly_resp_synd_surv_NYC_clean.csv",
row.names = FALSE)
# p = ggplot(data =
# resp_weekly_synd_surveil_clean_data_weeks_2_thru_52,
# aes(x = Week_Ending_in_Date,
# y = Count)) +
# geom_point() +
# rahul_man_figure_theme
# p
#
# write.csv(
# resp_weekly_synd_surveil_clean_data_weeks_2_thru_52,
# file =
# "../Generated_Data/weekly_resp_synd_surv_NYC_clean_weeks_2_thru_52.csv",
# row.names = FALSE)
resp_weekly_synd_surveil_calib_period =
resp_weekly_synd_surveil_clean_data %>%
filter(Year < 2020) %>%
filter(Hosp_Week <= 21)
head(resp_weekly_synd_surveil_calib_period)
## Week_Ending_in_Date Syndrome Year Hosp_Week Count
## 1 2016-01-02 Respiratory 2016 1 2962
## 2 2016-01-09 Respiratory 2016 2 9555
## 3 2016-01-16 Respiratory 2016 3 8982
## 4 2016-01-23 Respiratory 2016 4 8168
## 5 2016-01-30 Respiratory 2016 5 9093
## 6 2016-02-06 Respiratory 2016 6 9261
p = ggplot(data =
resp_weekly_synd_surveil_calib_period,
aes(x = Week_Ending_in_Date,
y = Count)) +
geom_point() +
rahul_man_figure_theme
p
p = ggplot(data =
resp_weekly_synd_surveil_calib_period,
aes(x = Hosp_Week,
y = Count,
color = as.factor(Year))) +
geom_point() + geom_line(aes(group = as.factor(Year))) +
rahul_man_figure_theme
p
resp_weekly_synd_surveil_calib_period =
resp_weekly_synd_surveil_calib_period %>%
dplyr::select(Week_Ending_in_Date,
Syndrome,
Year,
Hosp_Week,
Resp_Synd_Surveil_Count = Count)
resp_weekly_synd_surveil_calib_period =
resp_weekly_synd_surveil_calib_period[order(resp_weekly_synd_surveil_calib_period$Week_Ending_in_Date),]
write.csv(
resp_weekly_synd_surveil_calib_period,
file =
"../Generated_Data/weekly_resp_synd_surv_NYC_calib_period.csv",
row.names = FALSE)
raw_flu_confirmed_cases_NY_state =
read.csv("../Down_Data/Influenza_Laboratory-Confirmed_Cases_By_County__Beginning_2009-10_Season.csv")
head(raw_flu_confirmed_cases_NY_state)
## Season Region County CDC.Week Week.Ending.Date Disease Count
## 1 2010-2011 NYC NEW YORK 46 11/20/2010 INFLUENZA_A 15
## 2 2010-2011 NYC NEW YORK 50 12/18/2010 INFLUENZA_B 0
## 3 2010-2011 NYC RICHMOND 41 10/16/2010 INFLUENZA_A 0
## 4 2011-2012 NYC KINGS 42 10/22/2011 INFLUENZA_A 0
## 5 2012-2013 WESTERN SENECA 14 04/06/2013 INFLUENZA_A 0
## 6 2014-2015 WESTERN WAYNE 3 01/24/2015 INFLUENZA_B 2
## County.Centroid FIPS
## 1 (40.7831, -73.9712) 36061
## 2 (40.7831, -73.9712) 36061
## 3 (40.5795, -74.1502) 36085
## 4 (40.6782, -73.9442) 36047
## 5 (42.7652, -76.8721) 36099
## 6 (43.202, -77.0104) 36117
NYC_region_confirmed_cases_NY_state =
raw_flu_confirmed_cases_NY_state %>%
filter(Region == "NYC") %>%
group_by(Week.Ending.Date, CDC.Week) %>%
summarize(Count = sum(Count)) %>%
as.data.frame() %>%
mutate(Week_Ending_in_Date = as.Date(mdy(Week.Ending.Date))) %>%
mutate(Year = year(Week_Ending_in_Date)) %>%
mutate(CDC_Week = CDC.Week) %>%
mutate(Week = CDC_Week)
head(NYC_region_confirmed_cases_NY_state)
## Week.Ending.Date CDC.Week Count Week_Ending_in_Date Year CDC_Week Week
## 1 01/01/2011 52 479 2011-01-01 2011 52 52
## 2 01/02/2010 52 201 2010-01-02 2010 52 52
## 3 01/02/2016 52 31 2016-01-02 2016 52 52
## 4 01/03/2015 53 601 2015-01-03 2015 53 53
## 5 01/04/2014 1 619 2014-01-04 2014 1 1
## 6 01/04/2020 1 5343 2020-01-04 2020 1 1
head(NYC_region_confirmed_cases_NY_state)
## Week.Ending.Date CDC.Week Count Week_Ending_in_Date Year CDC_Week Week
## 1 01/01/2011 52 479 2011-01-01 2011 52 52
## 2 01/02/2010 52 201 2010-01-02 2010 52 52
## 3 01/02/2016 52 31 2016-01-02 2016 52 52
## 4 01/03/2015 53 601 2015-01-03 2015 53 53
## 5 01/04/2014 1 619 2014-01-04 2014 1 1
## 6 01/04/2020 1 5343 2020-01-04 2020 1 1
NYC_county_list = c("Brooklyn", "Kings", "Queens", "Bronx", "Richmond","New York")
p = ggplot(data = NYC_region_confirmed_cases_NY_state,
aes(x = Week_Ending_in_Date, y = Count)) +
geom_point() +
rahul_man_figure_theme
p
p = ggplot(data = NYC_region_confirmed_cases_NY_state,
aes(x = Week, y = Count,
color = as.character(Year))) +
geom_point() +
rahul_man_figure_theme
p
write.csv(
NYC_region_confirmed_cases_NY_state,
file =
"../Generated_Data/confirmed_weekly_flu_cases_NYC_counties.csv",
row.names = FALSE)
NYC_region_confirmed_flucases_NY_state_calib_years =
NYC_region_confirmed_cases_NY_state %>%
filter(Year > 2015) %>%
filter(Year < 2020) %>%
filter(Week < 40) %>%
dplyr::select(Week_Ending_in_Date,
CDC_Week,
Confirmed_Flu_Cases = Count,
Year = Year,
Week = Week)
p = ggplot(
data = NYC_region_confirmed_flucases_NY_state_calib_years,
aes(x = Week_Ending_in_Date, y = Confirmed_Flu_Cases)) +
geom_point() +
rahul_man_figure_theme
p
p = ggplot(
data = NYC_region_confirmed_flucases_NY_state_calib_years,
aes(x = Week, y = Confirmed_Flu_Cases,
color = as.character(Year))) +
geom_point() +
rahul_man_figure_theme
p
NYC_region_confirmed_flucases_NY_state_calib_years =
NYC_region_confirmed_flucases_NY_state_calib_years[order(NYC_region_confirmed_flucases_NY_state_calib_years$Week_Ending_in_Date),]
write.csv(NYC_region_confirmed_flucases_NY_state_calib_years,
file =
"../Generated_Data/confirmed_weekly_flu_cases_NYC_counties_calib_years.csv",
row.names = FALSE)
Calibration Period: Weeks 2-20 from 2016-2019
confirmed_flu_cases_calib =
read.csv(
file =
"../Generated_Data/confirmed_weekly_flu_cases_NYC_counties_calib_years.csv",
header = TRUE, stringsAsFactors = FALSE)
head(confirmed_flu_cases_calib)
## Week_Ending_in_Date CDC_Week Confirmed_Flu_Cases Year Week
## 1 2016-01-09 1 63 2016 1
## 2 2016-01-16 2 89 2016 2
## 3 2016-01-23 3 162 2016 3
## 4 2016-01-30 4 214 2016 4
## 5 2016-02-06 5 429 2016 5
## 6 2016-02-13 6 813 2016 6
resp_weekly_synd_surveil_calib_period =
read.csv(
file =
"../Generated_Data/weekly_resp_synd_surv_NYC_calib_period.csv",
header = TRUE, stringsAsFactors = FALSE)
head(resp_weekly_synd_surveil_calib_period)
## Week_Ending_in_Date Syndrome Year Hosp_Week Resp_Synd_Surveil_Count
## 1 2016-01-02 Respiratory 2016 1 2962
## 2 2016-01-09 Respiratory 2016 2 9555
## 3 2016-01-16 Respiratory 2016 3 8982
## 4 2016-01-23 Respiratory 2016 4 8168
## 5 2016-01-30 Respiratory 2016 5 9093
## 6 2016-02-06 Respiratory 2016 6 9261
calib_period_data = join(resp_weekly_synd_surveil_calib_period,
confirmed_flu_cases_calib)
## Joining by: Week_Ending_in_Date, Year
head(calib_period_data)
## Week_Ending_in_Date Syndrome Year Hosp_Week Resp_Synd_Surveil_Count
## 1 2016-01-02 Respiratory 2016 1 2962
## 2 2016-01-09 Respiratory 2016 2 9555
## 3 2016-01-16 Respiratory 2016 3 8982
## 4 2016-01-23 Respiratory 2016 4 8168
## 5 2016-01-30 Respiratory 2016 5 9093
## 6 2016-02-06 Respiratory 2016 6 9261
## CDC_Week Confirmed_Flu_Cases Week
## 1 NA NA NA
## 2 1 63 1
## 3 2 89 2
## 4 3 162 3
## 5 4 214 4
## 6 5 429 5
#Remove first week of hosp data (may not have 7 full days)
calib_period_data = calib_period_data %>%
filter(Hosp_Week > 1) %>%
filter(Hosp_Week < 21) %>%
filter(Year != 2018)
no_flu_cases_dates = calib_period_data %>%
filter(is.na(Confirmed_Flu_Cases) == TRUE)
p = ggplot(data = calib_period_data,
aes(x = Confirmed_Flu_Cases,
y = Resp_Synd_Surveil_Count)) +
geom_point() + rahul_man_figure_theme
p
p = ggplot(data = calib_period_data,
aes(x = Confirmed_Flu_Cases,
y = Resp_Synd_Surveil_Count,
color = as.factor(Year))) +
geom_point() + rahul_man_figure_theme
p
There were 2962 resp syndrome reports during the first week of January 2016. This is much lower than all other dates in this period. For comparison, 9,555 reports were obtained in the next week. The first week is about 31% of the second week. This makes sense if the reporting period for the first week is only from Jan 1st (Friday) to Jan 2nd, 2016 (Saturday), about 28.6% of the size of a regular week. Therefore, we will record the resp reports as occurring on the raw week (not CDC adjusted week) and ignore the first week’s reports for comparison.
You can see this in the plots of the first week of every year-all except 2017 are lower than the following week, and 2017’s first day was on a Sunday.
lm_resp_weekly_vs_flu_calib =
lm(calib_period_data$Resp_Synd_Surveil_Count ~
calib_period_data$Confirmed_Flu_Cases)
summary(lm_resp_weekly_vs_flu_calib)
##
## Call:
## lm(formula = calib_period_data$Resp_Synd_Surveil_Count ~ calib_period_data$Confirmed_Flu_Cases)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1363.2 -670.5 -129.5 626.5 2870.2
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 8283.1018 186.6424 44.380
## calib_period_data$Confirmed_Flu_Cases 0.8134 0.1215 6.695
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## calib_period_data$Confirmed_Flu_Cases 1.19e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 914.2 on 55 degrees of freedom
## Multiple R-squared: 0.4491, Adjusted R-squared: 0.439
## F-statistic: 44.83 on 1 and 55 DF, p-value: 1.189e-08
lm_coef = lm_resp_weekly_vs_flu_calib$coefficients
resp_synd_reports_per_flu_case =
lm_coef['calib_period_data$Confirmed_Flu_Cases'] %>%
as.numeric()
baseline_resp_with_no_flu = lm_coef['(Intercept)'] %>%
as.numeric()
prop_of_var_in_resp_explained_by_flu =
summary(lm_resp_weekly_vs_flu_calib)$adj.r.squared
resp_synd_reports_per_flu_case
## [1] 0.8134033
baseline_resp_with_no_flu
## [1] 8283.102
prop_of_var_in_resp_explained_by_flu
## [1] 0.4390346
Plot data overlaid with best fit line
small_breaks_flu_cases =
seq(from =
min(calib_period_data$Confirmed_Flu_Cases),
to = max(calib_period_data$Confirmed_Flu_Cases),
length = 10^3)
flu_linear_curve = baseline_resp_with_no_flu +
resp_synd_reports_per_flu_case*small_breaks_flu_cases
flu_linear_curve_df = data.frame(
small_breaks = small_breaks_flu_cases,
poly_curve = flu_linear_curve,
plot_var_label = "flu_cases")
plot_label = paste0(" S = ",
round(
resp_synd_reports_per_flu_case,
digits = 2),
" F + ",
round(
baseline_resp_with_no_flu,
digits = 2),
" \n R^2 = ",
round(
prop_of_var_in_resp_explained_by_flu,
digits = 2)
)
p = ggplot() +
geom_point(data = calib_period_data,
aes(x = Confirmed_Flu_Cases,
y = Resp_Synd_Surveil_Count)) +
scale_linetype_manual(values = c("blank", "solid")) +
rahul_man_figure_theme +
theme_white_background +
geom_line(data = flu_linear_curve_df,
aes(x = small_breaks, y = poly_curve),
color = 'red', show.legend = F) +
scale_x_continuous(
breaks = scales::pretty_breaks(n = 3)) +
theme(
aspect.ratio = 1,
strip.background = element_blank(),
strip.placement = "outside"
) +
theme(legend.position = "None") +
annotate(geom="text", x=2300, y=15500, label=plot_label,
color="black")
p
png("../Figures/Resp_syndrome_surveillance_vs_flu_cases_weeks_2_to_20_2016_thru_2019.png")
print(p)
dev.off()
## quartz_off_screen
## 2
residuals_from_removing_flu_effect =
lm_resp_weekly_vs_flu_calib$residuals
calib_period_data = calib_period_data %>%
mutate(residuals_from_removing_flu_effect = residuals_from_removing_flu_effect)
head(calib_period_data)
## Week_Ending_in_Date Syndrome Year Hosp_Week Resp_Synd_Surveil_Count
## 1 2016-01-09 Respiratory 2016 2 9555
## 2 2016-01-16 Respiratory 2016 3 8982
## 3 2016-01-23 Respiratory 2016 4 8168
## 4 2016-01-30 Respiratory 2016 5 9093
## 5 2016-02-06 Respiratory 2016 6 9261
## 6 2016-02-13 Respiratory 2016 7 9627
## CDC_Week Confirmed_Flu_Cases Week residuals_from_removing_flu_effect
## 1 1 63 1 1220.6537
## 2 2 89 2 626.5053
## 3 3 162 3 -246.8732
## 4 4 214 4 635.8299
## 5 5 429 5 628.9481
## 6 6 813 6 682.6013
p = ggplot(data = calib_period_data,
aes(x = as.Date(Week_Ending_in_Date),
y = residuals_from_removing_flu_effect)) +
geom_point() +
rahul_man_figure_theme
p
p = ggplot(data = calib_period_data,
aes(x = Week,
y = residuals_from_removing_flu_effect,
color = as.factor(Year))) +
geom_point() +
rahul_man_figure_theme
p
resid_2016_only = calib_period_data %>%
filter(Year == 2016)
acf(resid_2016_only$residuals_from_removing_flu_effect)
resid_2017_only = calib_period_data %>%
filter(Year == 2017)
acf(resid_2017_only$residuals_from_removing_flu_effect)
resid_poly_fit_model <- lm(
calib_period_data$residuals_from_removing_flu_effect ~ poly(
calib_period_data$Week,3, raw = TRUE))
summary(resid_poly_fit_model)
##
## Call:
## lm(formula = calib_period_data$residuals_from_removing_flu_effect ~
## poly(calib_period_data$Week, 3, raw = TRUE))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1406.0 -486.2 -116.3 574.7 1982.2
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1605.86604 609.06075 2.637
## poly(calib_period_data$Week, 3, raw = TRUE)1 -260.64370 231.31347 -1.127
## poly(calib_period_data$Week, 3, raw = TRUE)2 6.86706 24.48126 0.281
## poly(calib_period_data$Week, 3, raw = TRUE)3 0.08506 0.75668 0.112
## Pr(>|t|)
## (Intercept) 0.011 *
## poly(calib_period_data$Week, 3, raw = TRUE)1 0.265
## poly(calib_period_data$Week, 3, raw = TRUE)2 0.780
## poly(calib_period_data$Week, 3, raw = TRUE)3 0.911
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 763.8 on 53 degrees of freedom
## Multiple R-squared: 0.3273, Adjusted R-squared: 0.2893
## F-statistic: 8.597 on 3 and 53 DF, p-value: 9.541e-05
calib_period_data$Poly_Fit =
resid_poly_fit_model$fitted.values
small_breaks_week = seq(
from= min(calib_period_data$Week),
to = max(calib_period_data$Week),
length = 10^3)
resid_poly_intercept =summary(
resid_poly_fit_model)$coefficients[1,1]
resid_poly_order_1 = summary(
resid_poly_fit_model)$coefficients[2,1]
resid_poly_order_2 = summary(
resid_poly_fit_model)$coefficients[3,1]
resid_poly_order_3 = summary(
resid_poly_fit_model)$coefficients[4,1]
prop_of_var_in_resid_explained_by_poly =
summary(resid_poly_fit_model)$adj.r.squared
resid_poly_curve = resid_poly_intercept +
resid_poly_order_1*small_breaks_week +
resid_poly_order_2*I(small_breaks_week^2) +
resid_poly_order_3*I(small_breaks_week^3)
resid_poly_curve_df = data.frame(
small_breaks = small_breaks_week,
resid_poly_curve = resid_poly_curve)
resid_poly_plot_label = paste0(" resid = ",
round(
resid_poly_order_3,
digits = 2),
" Week^3 + \n ",
round(
resid_poly_order_2,
digits = 2),
" Week^2 + \n ",
round(
resid_poly_order_1,
digits = 2),
" Week + \n ",
round(
resid_poly_intercept,
digits = 2),
" \n R^2 = ",
round(
prop_of_var_in_resid_explained_by_poly,
digits = 2)
)
p = ggplot() +
geom_point(data = calib_period_data,
aes(x = Week,
y = residuals_from_removing_flu_effect)) +
geom_line(data = resid_poly_curve_df,
aes(x = small_breaks_week,
y = resid_poly_curve),
color = 'red', show.legend = F) +
rahul_man_figure_theme +
theme_white_background +
scale_x_continuous(
breaks = scales::pretty_breaks(n = 3)) +
theme(
aspect.ratio = 1,
strip.background = element_blank(),
strip.placement = "outside"
) +
theme(legend.position = "None") +
annotate(geom="text", x=12, y=2000,
label=resid_poly_plot_label,
color="black")
p
png("../Figures/Resp_syndrome_surveillance_vs_flu_cases_weeks_2_to_20_poly_fit_of_residuals.png")
print(p)
dev.off()
## quartz_off_screen
## 2
p
residuals_from_removing_resid_poly =
resid_poly_fit_model$residuals
calib_period_data = calib_period_data %>%
mutate(residuals_from_removing_resid_poly = residuals_from_removing_resid_poly)
head(calib_period_data)
## Week_Ending_in_Date Syndrome Year Hosp_Week Resp_Synd_Surveil_Count
## 1 2016-01-09 Respiratory 2016 2 9555
## 2 2016-01-16 Respiratory 2016 3 8982
## 3 2016-01-23 Respiratory 2016 4 8168
## 4 2016-01-30 Respiratory 2016 5 9093
## 5 2016-02-06 Respiratory 2016 6 9261
## 6 2016-02-13 Respiratory 2016 7 9627
## CDC_Week Confirmed_Flu_Cases Week residuals_from_removing_flu_effect
## 1 1 63 1 1220.6537
## 2 2 89 2 626.5053
## 3 3 162 3 -246.8732
## 4 4 214 4 635.8299
## 5 5 429 5 628.9481
## 6 6 813 6 682.6013
## Poly_Fit residuals_from_removing_resid_poly
## 1 1352.1745 -131.52070
## 2 1112.7273 -486.22205
## 3 888.0350 -1134.90813
## 4 678.6077 -42.77786
## 5 484.9559 143.99224
## 6 307.5899 375.01140
p = ggplot(data = calib_period_data,
aes(x = Week,
y = residuals_from_removing_resid_poly)) +
geom_point() +
rahul_man_figure_theme
p
p = ggplot(data = calib_period_data,
aes(x = Week,
y = residuals_from_removing_resid_poly,
color = as.factor(Year))) +
geom_point() +
rahul_man_figure_theme
p
qqnorm(calib_period_data$residuals_from_removing_resid_poly, pch = 1, frame = FALSE)
qqline(calib_period_data$residuals_from_removing_resid_poly, col = "steelblue", lwd = 2)
mean_resid = mean(calib_period_data$residuals_from_removing_resid_poly)
sd_resid = sd(calib_period_data$residuals_from_removing_resid_poly)
mean_resid
## [1] 5.684342e-14
sd_resid
## [1] 743.0444
residual_standard_eror = sqrt(deviance(resid_poly_fit_model)/df.residual(resid_poly_fit_model))
sigma_epsilon = residual_standard_eror
sigma_epsilon
## [1] 763.7845
Let \(G(w)\) be the number of non-COVID respiratory syndrome cases presenting in the ED of hospitals in NYC in week \(w\) of year \(y\).
\[\begin{equation} G(w) = g_0 + g_{\text{F}} F(w,y) + \beta_{\text{w}^2}w^2 + \beta_{\text{w}^1}w + \beta_{\text{w}^0} + \epsilon \end{equation}\]
where \(F(w,y)\) is the number of confirmed flu cases in NYC, and:
\[\begin{equation} \epsilon \sim rnorm(0,\sigma_{\epsilon}) \end{equation}\]
Values of other model coefficients:
g_F = resp_synd_reports_per_flu_case
g_0 = baseline_resp_with_no_flu
Beta_w_3 = resid_poly_order_3
Beta_w_2 = resid_poly_order_2
Beta_w_1 = resid_poly_order_1
Beta_w_0 = resid_poly_intercept
g_F
## [1] 0.8134033
g_0
## [1] 8283.102
Beta_w_3
## [1] 0.0850551
Beta_w_2
## [1] 6.86706
Beta_w_1
## [1] -260.6437
Beta_w_0
## [1] 1605.866
sigma_epsilon
## [1] 763.7845
fitted_NC_model_params = data.frame(g_F = g_F, g_0 =g_0,
Beta_w_3 = Beta_w_3,
Beta_w_2 = Beta_w_2,
Beta_w_1 = Beta_w_1,
Beta_w_0 = Beta_w_0,
sigma_epsilon = sigma_epsilon)
write.csv(fitted_NC_model_params,
file = "../Generated_Data/fitted_NC_model_params.csv",
append = FALSE,
row.names = FALSE)
## Warning in write.csv(fitted_NC_model_params, file = "../Generated_Data/
## fitted_NC_model_params.csv", : attempt to set 'append' ignored
NYC_region_confirmed_flucases_NY_state_2020 =
NYC_region_confirmed_cases_NY_state %>%
filter(Year == 2020) %>%
filter(Week < 21) %>%
filter(Week > 1) %>%
dplyr::select(Week_Ending_in_Date,
CDC_Week,
Confirmed_Flu_Cases = Count,
Year = Year,
Week = Week)
w_pred = seq(from = 2, to = 15, by = 1)
G_w_2020_pred = g_0 +
g_F*NYC_region_confirmed_flucases_NY_state_2020$Confirmed_Flu_Cases + Beta_w_3*w_pred^3 + Beta_w_2*w_pred^2 + Beta_w_1*w_pred
G_w_2020_pred = G_w_2020_pred + Beta_w_0
G_w_2020_pred_df = data.frame(G_w_2020_pred,
w_pred)
p = ggplot(data = G_w_2020_pred_df,
aes(x = w_pred,
y = G_w_2020_pred)) +
geom_point() + geom_line() +
rahul_man_figure_theme +
theme_white_background
p
resp_weekly_synd_surveil_2020 =
resp_weekly_synd_surveil_clean_data %>%
filter(Year == 2020) %>%
filter(Hosp_Week <= 20) %>%
filter(Hosp_Week > 1)
head(resp_weekly_synd_surveil_2020)
## Week_Ending_in_Date Syndrome Year Hosp_Week Count
## 1 2020-01-11 Respiratory 2020 2 13225
## 2 2020-01-18 Respiratory 2020 3 12968
## 3 2020-01-25 Respiratory 2020 4 13256
## 4 2020-02-01 Respiratory 2020 5 14123
## 5 2020-02-08 Respiratory 2020 6 13118
## 6 2020-02-15 Respiratory 2020 7 11117
resp_weekly_synd_surveil_validation_period =
resp_weekly_synd_surveil_2020 %>%
filter(Hosp_Week <=15) %>%
dplyr::select(Week = Hosp_Week,
Week_Ending_in_Date,
Actual_Resp_Count = Count) %>%
mutate(Week = as.numeric(Week))
validation_comp_df = G_w_2020_pred_df %>%
dplyr::select(Week = w_pred,
G_w_2020_pred) %>%
join(resp_weekly_synd_surveil_validation_period)
## Joining by: Week
validation_comp_melt_df = validation_comp_df %>%
melt(id.vars = c("Week", "Week_Ending_in_Date"))
p = ggplot(data = validation_comp_melt_df,
aes(x = Week, y = value,
color = variable)) +
geom_point() +
geom_line(aes(group = variable)) +
rahul_man_figure_theme +
theme_white_background
p
p = ggplot(data = validation_comp_melt_df,
aes(x = Week_Ending_in_Date, y = value,
color = variable)) +
geom_point() +
geom_line(aes(group = variable)) +
rahul_man_figure_theme +
theme_white_background
p
sim_data_G = data.frame(matrix(nrow = 0, ncol = 4))
sim_data_G_colnames = c("Sim",
"Week",
"Week_Ending_in_Date",
"G_w_2020_pred")
colnames(sim_data_G) = sim_data_G_colnames
w_pred = seq(from = 2, to = 15, by = 1)
Week_Ending_Date_list = unique(validation_comp_df$Week_Ending_in_Date)
N_sim = 100
for(sim_index in seq(1:N_sim)){
single_sim_df = data.frame(matrix(
nrow = length(Week_Ending_Date_list),
ncol = 4))
colnames(single_sim_df) = sim_data_G_colnames
single_sim_df$Week = seq(from = 2, to = 15, by = 1)
single_sim_df$Week_Ending_in_Date =
unique(validation_comp_df$Week_Ending_in_Date)
single_sim_df$Sim = sim_index
single_sim_df$G_w_2020_pred = g_0 +
g_F*NYC_region_confirmed_flucases_NY_state_2020$Confirmed_Flu_Cases +Beta_w_3*w_pred^3 +
Beta_w_2*w_pred^2 + Beta_w_1*w_pred
single_sim_df$G_w_2020_pred =
single_sim_df$G_w_2020_pred +
Beta_w_0
single_sim_df$G_w_2020_pred =
single_sim_df$G_w_2020_pred +
rnorm(n = 1,
mean = 0, sd = sigma_epsilon)
sim_data_G = rbind(sim_data_G, single_sim_df)
}
head(sim_data_G)
## Sim Week Week_Ending_in_Date G_w_2020_pred
## 1 1 2 2020-01-11 15700.53
## 2 1 3 2020-01-18 16694.32
## 3 1 4 2020-01-25 16965.61
## 4 1 5 2020-02-01 17011.10
## 5 1 6 2020-02-08 16274.12
## 6 1 7 2020-02-15 14410.28
sim_data_G_summary = sim_data_G %>%
group_by(Week, Week_Ending_in_Date) %>%
summarize(G_w_pred_2020_mean =
mean(G_w_2020_pred),
G_w_pred_2020_upper_CI =
quantile(G_w_2020_pred, 0.975),
G_w_pred_2020_lower_CI =
quantile(G_w_2020_pred, 0.025))
sim_start_date = as.Date("2020-03-01")
true_quarantine_start_time = as.Date("2020-03-18")
p = ggplot() +
geom_ribbon(data = sim_data_G_summary,
aes(x = Week_Ending_in_Date,
ymin =G_w_pred_2020_lower_CI,
ymax = G_w_pred_2020_upper_CI),
fill = 'blue', alpha = 0.5)+
geom_point(data = sim_data_G_summary,
aes(x = Week_Ending_in_Date,
y = G_w_pred_2020_mean),
color = 'blue') +
geom_line(data = sim_data_G_summary,
aes(x = Week_Ending_in_Date,
y = G_w_pred_2020_mean),
color = 'blue') +
geom_line(data = validation_comp_df,
aes(x = Week_Ending_in_Date,
y = Actual_Resp_Count),
color = 'red') +
geom_point(data = validation_comp_df,
aes(x = Week_Ending_in_Date,
y = Actual_Resp_Count),
color = 'red') +
rahul_man_figure_theme +
geom_vline(xintercept = sim_start_date, color = 'orange') +
geom_vline(xintercept = true_quarantine_start_time, color = 'blue') +
theme_white_background
p
png("../Figures/resp_reports_2020.png")
print(p)
dev.off()
## quartz_off_screen
## 2
write.csv(sim_data_G,
file = "../Generated_Data/simulated_non_COVID_data.csv",
append = FALSE,
row.names = FALSE)
## Warning in write.csv(sim_data_G, file = "../Generated_Data/
## simulated_non_COVID_data.csv", : attempt to set 'append' ignored
write.csv(NYC_region_confirmed_flucases_NY_state_2020,
file = "../Generated_Data/NYC_region_confirmed_flucases_NY_state_2020.csv",
row.names = FALSE)